library(RMongo)
## Loading required package: rJava
library(ggplot2)
library(plyr)
library(reshape2)
library(knitr)
Load data
cross_validation <- mongoDbConnect("classification_cross_validation", "localhost", 27017)
performance <- dbGetQuery(cross_validation, "performance", '{}', 0, 0)
Filter just one half of the symmetric (regarding to AUC) data
# juged.data <- pr.complete[which(pr.complete$criteria %in% c('juged_bad', 'juged_good')), ]
# interact_length.data <- pr.complete[which(pr.complete$criteria %in% c('short_interactions', 'long_interactions')), ]
# real_simulated.data <- pr.complete[which(pr.complete$criteria %in% c('real', 'simulated')), ]
# success.data <- pr.complete[which(pr.complete$criteria %in% c('task_failed', 'task_successful')), ]
# word_accuracy.data <- pr.complete[which(pr.complete$criteria %in% c('word_accuracy_100', 'word_accuracy_60')), ]
# simulation_quality.data <- pr.complete[which(pr.complete$criteria %in% c('simulation_quality_best', 'simulation_quality_worst')), ]
# real_vs_worst_sim.data <- pr.complete[which(pr.complete$criteria %in% c('real_vs_simulated_worst', 'simulated_worst_vs_real')), ]
cutted <- performance[performance$criteria=='juged_bad',]
cutted <- rbind(cutted, performance[performance$criteria=='short_interactions',])
cutted <- rbind(cutted, performance[performance$criteria=='real',])
cutted <- rbind(cutted, performance[performance$criteria=='task_failed',])
cutted <- rbind(cutted, performance[performance$criteria=='word_accuracy_100',])
cutted <- rbind(cutted, performance[performance$criteria=='simulation_quality_best',])
cutted <- rbind(cutted, performance[performance$criteria=='real_vs_simulated_worst',])
# Set names for criteria
cutted$criteria_name <- 'NA'
cutted[which(cutted$criteria == 'juged_bad'),]$criteria_name <- 'user jugedment'
cutted[which(cutted$criteria == 'short_interactions'),]$criteria_name <- 'dialogue length'
cutted[which(cutted$criteria == 'real'),]$criteria_name <- 'real vs simulated'
cutted[which(cutted$criteria == 'task_failed'),]$criteria_name <- 'task success'
cutted[which(cutted$criteria == 'word_accuracy_100'),]$criteria_name <- 'word accuracy'
cutted[which(cutted$criteria == 'simulation_quality_best'),]$criteria_name <- 'real vs. simulated (good)'
cutted[which(cutted$criteria == 'real_vs_simulated_worst'),]$criteria_name <- 'real vs. simulated (bad)'
Histogram of AUC for all 2688 scenarios.
Mean: 0.6595527
Median: 0.6086348
ggplot(cutted, aes(x=criteria_name, y=auc, color=classifier_name)) + geom_point() +
theme(axis.text.x = element_text(angle=90))
ggplot(cutted[which(cutted$classifier_name == 'rank order'),], aes(x=criteria_name, y=auc, color=factor(n_gram_size))) +
geom_point() +
theme(axis.text.x = element_text(angle=90))
ggplot(cutted[which(cutted$classifier_name == 'rank order'),], aes(x=criteria_name, y=auc, color=factor(frequency_threshold))) +
geom_point() +
theme(axis.text.x = element_text(angle=90))
ggplot(cutted[which(cutted$classifier_name == 'rank order'),], aes(x=criteria_name, y=auc, color=factor(smoothing_value))) +
geom_point() +
theme(axis.text.x = element_text(angle=90))
Get scenatios with AUC = 1.
# get all performance entries with auc = 1
p <- cutted[which(cutted$auc == 1),]
#dbGetQuery(cross_validation, "performance", "{auc: 1}", 0, 0)
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.